2015-10-19

創用 CC 授權條款

首先

恭喜Taiwan R User Group 三歲了

據說

今天有殘酷擂台

所以在這特別的日子

當然要講一些 有的沒有的有趣的主題

DQMSL

是什麼?可以吃嗎?

DQMSL = 勇者鬥惡龍怪物仙境

當初看到這個遊戲

身為骨灰級的玩家眼淚都要掉下來了啊

不知荒廢了多少的青春

既然叫做怪物仙境

就是有很多怪物的意思

為了這次的活動

我就稍微爬了一下怪物的資料

Load libraries

library(rvest)
library(stringr)
library(data.table)
library(googleVis)
library(ca)

Get urls

ranking_pages = read_html("http://dqmsl-search.net/ranking/allsbjstatus?hide=&hides=,star1,star2,star3,star4")
urls_xpath = "/html/body/div[@class='mainh']/div[@class='mainc']/div[@class='ccol']/div[@class='mbox'][2]/div[@class='mboxb']/div/div[@class='innnerHideDiv']/div/div/a"
base_url = "http://dqmsl-search.net"
monster_urls = ranking_pages %>% 
    html_nodes(xpath = urls_xpath) %>%
    html_attr(name = "href") %>%
    unique() %>%
    (function(x) {paste0(base_url, x, sep = "")}) 
monster_urls[1:3]
## [1] "http://dqmsl-search.net/monster/detail?no=501"
## [2] "http://dqmsl-search.net/monster/detail?no=521"
## [3] "http://dqmsl-search.net/monster/detail?no=543"

ETL is omitted.

Preprocessing

monsters = fread("data/monsters.csv")
character_cols = c("id", "name", "rank", "system", "type")
numeric_cols = names(monsters)[!names(monsters) %in% character_cols]
monsters[, c(numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
monsters[, total := hp + mp + str + def + agi + int]
monsters[, icon := sprintf('<img src="img/icon/%s.gif" alt="%s" width="40">', 
                           str_pad(id, width = 6, pad = "0"), id, name)]
monsters[, name := sprintf("<a href='http://dqmsl-search.net/monster/detail?no=%s'>%s</a>",
                           id, name)]
monsters[, `:=`(like_percent = like / view * 100, 
                hate_percent = hate / view * 100)]

Monsters' Data

mtable = gvisTable(monsters, options = list(page = "enable",
                                            pageSize = 5))
print(mtable, "chart")

Plot categorical data with barchart

print_bar = function(dt, colname, height = 400, width = 900) {
    percent = monsters[, .(percent = .N), by = colname][
        , percent := percent / sum(percent) * 100]
    print(gvisBarChart(percent, options = list(height = height, width = width)), 
          "chart")
}

Rank distribution

print_bar(monsters, "rank")

System distribution

print_bar(monsters, "system")

Type distribution

print_bar(monsters, "type")

System vs Type

system_type = table(monsters$system, monsters$type)
system_type
##             
##              万能 回復 攻撃 特殊 補助 防御 魔法
##   悪魔系        5    3   32   10   17    3   52
##   物質系        0    4   36    7   19   26   10
##   ドラゴン系    2    2   64    1    4   10    1
##   スライム系    6   11   24   10    5   11    3
##   ???系      5    0   16   12    0    0   13
##   ゾンビ系      3    2   27    1    6    4    6
##   自然系        7    6   27    5   19   17    7
##   転生系        0    0    0   35    0    0    0
##   魔獣系        5    3   70    6   21    8    7

Stacked barchart

system_type_dt = as.data.table(round(prop.table(system_type, margin = 1) * 100, 2))
setnames(system_type_dt, names(system_type_dt), c("system", "type", "count"))
system_type_dt = dcast(system_type_dt, system ~ type, value.var = "count")
yvar = names(system_type_dt)[!names(system_type_dt) %in% "system"]
print(gvisBarChart(system_type_dt, xvar = "system", yvar = yvar, 
                  options = list(isStacked = TRUE, height = 300, width = 900)), "chart")

Correspondence analysis

cafit = ca(system_type)
ca_dt = rbind(data.table(Dim1 = cafit$rowcoord[, 1], 
                         system = cafit$rowcoord[, 2],
                         system.html.tooltip = rownames(system_type), 
                         type = rep(NA, nrow(system_type)),
                         type.html.tooltip = rep(NA, nrow(system_type))),
              data.table(Dim1 = cafit$colcoord[, 1], 
                         system = rep(NA, ncol(system_type)),
                         system.html.tooltip = rep(NA, ncol(system_type)),
                         type = cafit$colcoord[, 2],
                         type.html.tooltip = colnames(system_type)))
tick_str = "{'ticks': [-5, -4, -3, -2, -1, 0, 1, 2] }"
ca_plot = gvisScatterChart(ca_dt, options = list(width = 500, height = 500,
                                                 hAxis = tick_str, vAxis = tick_str))

Correspondence analysis - 2

print(ca_plot, "chart")

Weight

print(ftable(rank ~ weight, data = monsters))
##        rank   A   B   C   D   E   F   S  SS
## weight                                     
## 2             0   0   0  71  38  15   0   0
## 3             0   0  80   0   0   0   0   0
## 6             0 111   0   0   0   0   0   0
## 9           170   0   0   0   0   0   0   0
## 14            0   0   0   0   0   0  58   0
## 18            0   0   0   0   0   0 108   0
## 23            0   0   0   0   0   0   9   0
## 27            0   0   0   0   0   0   0  47
## 32            0   0   0   0   0   0   0   9

Multidimensional scaling

Here we choose SS monsters to find distaance of monsters via hp, mp, str, def, agi and int.

body_cols = c("hp", "mp", "str", "def", "agi", "int")
scaled_body_cols = paste0("scaled_", body_cols)
ss = copy(monsters[rank %in% c("SS"), ])
ss[, icon := str_replace(icon, 'width=\"40\"', 'width=\"120\"')]
ss[, c(scaled_body_cols) := lapply(.SD, scale), .SDcols = body_cols]
ss_dist = dist(ss[, scaled_body_cols, with = FALSE])

fit = cmdscale(ss_dist, eig = TRUE, k=2)
plot_dt = data.table(Dim1 = fit$points[,1],
                     Dim2 = fit$points[,2],
                     Dim2.html.tooltip = ss$icon)

plot_out = gvisScatterChart(plot_dt, options=list(tooltip="{isHtml:'true'}",
                                                  width = 500, height = 500, 
                                                  legend = '{"position": "none"}'))

Multidimensional scaling - 2

print(plot_out, "chart")

Compute mean for each type

ss_type_mean = ss[, lapply(.SD, mean), 
                  by = c("type"), 
                  .SDcols = body_cols]
ss_type_mean = dcast(melt(ss_type_mean, id.vars = "type"), variable ~ type)
plot_out = gvisLineChart(ss_type_mean, xvar = "variable", 
                         options = list(height = 300, width = 800))
print(plot_out, "chart")

最後

在勇者鬥惡龍的世界,與怪物們一起冒險吧!輸入邀請碼「BLNEeQbw」即可獲得豪華獎勵!

謝謝大家(有機會到這一頁嗎XD)